#-------------------------------
#Purpose: Read AEA and EGAP registry: number of PAPs over time by registry
#input files:aer_registry092019.csv,egap_registry092019.csv
#output file:paps_registered_overtime1.pdf,paps_registered_overtime2.pdf
#Created:September 17, 2019 
#R_version:R version 4.0.1 (2020-06-06) -- "See Things Now"
#-------------------------------

#load libraries
library(tidyverse)
library(stringr)
library(lubridate)
library(ggthemes)


#set working directory

setwd("")


#read registries

aea <-read_csv("aer_registry012020.csv")
egap <-read_csv("egap_registry012020.csv")


#-select relevant variables in registries

##aea
aea_sub <- select(aea, RCT_ID, `First registered on`)

#head(aea_sub)
##rename variables to match with EGAP registry
aea_sub <- mutate(aea_sub, Registry= "AEA",date=ymd(`First registered on`), `Year of registration`=year(date))


#egap
#names(egap)
egap <- dplyr::select(egap,ID)

egap <- as_tibble(egap)
#head(egap)

egap<-mutate(egap, `First registered on`=ymd(str_sub(egap$ID,start = 1,end = 8)),date =`First registered on`, `Year of registration`=year(date),Registry="EGAP")

names(egap)[1]<- "RCT_ID"
head(egap)

## combine aea and egap datasets
master_dat <- bind_rows(aea_sub,egap)

dim(master_dat)
names(master_dat)

##generate time series

#table(master_dat$`Year of registration`)

sum_dat <- master_dat %>%
  group_by(Registry, `Year of registration`)%>%
  summarize(studies=n())

sum_dat_full <- master_dat %>%
  group_by(`Year of registration`)%>%
  summarize(studies=n())

sum_dat_full <-mutate(sum_dat_full,Registry="Total")

pltdat <- bind_rows(sum_dat_full,sum_dat)


ggplot(pltdat ,aes(x=`Year of registration`,y=studies,linetype=Registry))+
  geom_line(lwd=1)+
  scale_x_continuous(name="Year",breaks = seq(from=2011,to=2019,by=1),labels=as.character(seq(from=2011,to=2019,by=1)))+
  labs(x="Year",y="Frequency",linetype="Registry")+
  theme_tufte()+
  theme(axis.title = element_text(face = "bold",size = 28,colour = "black"),legend.title = element_text(face = "bold",size = 20),legend.key =element_rect(fill = "white",color = "white"),axis.ticks = element_line(size = 2),panel.background =element_blank(),axis.line = element_line(size = 1, linetype = "solid"),title = element_text(family = "serif",size=13,colour = "black",face = "bold"), axis.text.y = element_text(family = "serif",size=24,colour = "black",face = "bold"),axis.text.x = element_text(family = "serif",size=24,colour = "black",face = "bold"),strip.text = element_text(size=16, face="bold"),legend.text =element_text(family = "serif",size=16,colour = "black"),panel.grid.minor = element_line(colour="grey", size=0.5))
  

#ggsave("paps_registered_overtime1.pdf")
  





